/*
Program written 04/27/2023
Written by teresa fort

Program analyzes how import trade is classified

*/

cd "xxxx"  /* PROJECT ROOT FOLDER */


global imp_type_input "data"
global imp_type "data"
global output "output"
global outv _07

di "$outv"


use "$imp_type_input/firm_imports_type_2007.dta", clear

tab short AR

*Check that firm inputs/outputs are always industry inputs/outputs
foreach var in input prod {
    gen check_`var'="yes" if f_`var'==1 & n_`var'==1
    replace check_`var'="no" if f_`var'==1 & n_`var'~=1
    }
tab check_input, miss
tab check_prod, miss  

*Make a type variable
gen type="firm input" if f_input==1
   replace type="ind input" if type=="" & n_input==1
   replace type="firm prod" if type=="" & f_prod==1
   replace type="ind prod" if type=="" & n_prod==1
   replace type="firm both" if f_input==1 & f_prod==1
   replace type="ind both" if f_input==. & f_prod==. & n_input==1 & n_prod==1
   replace type="firm input, ind prod" if f_input==1 & f_prod==. & n_prod==1
   replace type="ind input, firm prod" if f_input==. & f_prod==1 & n_input==1
   replace type="none" if f_input==. & f_prod==. & n_input==. & n_prod==.
     
   tab in_mat in_prod, miss 
   foreach var in cmf mat prod {
      replace in_`var'=0 if in_`var'==.
      }
   

 *NOTE: the industry measures do not rely on the plant being in the trailer data
 tab type in_prod
   
   gen type2=type
   replace type2="in mat" if type2=="none" & in_mat==1
   replace type2="in prod" if type2=="none" & in_prod==1
   replace type2="in both" if (type2=="in mat" | type2=="in prod") & in_mat==1 & in_prod==1
   replace type2="in cmf" if type2=="none" & in_cmf==1
   
   gen none=1 if type=="none"
   gen none_cmf=1 if type=="none" & in_cmf==1
   

tab type, miss
tab type2, miss
tab type2 if type2~="none"


  
*Program to make an excel file with summary stats by variable and RP status
capture program drop summ_stats
 program define summ_stats
     args byvar
   preserve  
   bys firmid `byvar': gen firms=1 if _n==1
   collapse (sum) value rp_value firms, by(`byvar')

   foreach var in value rp_value firms {
	egen tot_`var'=sum(`var')
	gen double share_`var'=round(`var'/tot_`var',0.001)
	}
   gen sh_rp=rp_value/value	
   gen imports=value/1000000
   label variable imports "Imports (\$M)"
   label variable firms "Firms"
   label variable share_value "Share of Imports"	
   label variable share_firms "Share of Firms"
   label variable share_rp_value "Share of RP Imports"	
   label variable sh_rp "RP Share"
  
  format share_value share_rp_value sh_rp share_firms %9.2fc
  export excel `byvar' imports firms share_value share_rp_value sh_rp share_firms  ///
    using $output/import_types$outv.xls, sheetreplace sheet(`byvar') ///
    firstrow(varlabels)
   restore 
   end
 summ_stats in_prod
 summ_stats in_cmf  
 summ_stats in_mat
 summ_stats type
 summ_stats type2
 summ_stats f_input
 summ_stats f_prod
 summ_stats n_input
 summ_stats n_prod
 summ_stats AR 
  
  
  
  

*Make a list of products based on whether they are potentially inputs or prods
******************************************************************************
  save "$imp_type/temp.dta", replace
  bys firmid hs n_input: gen firms=1 if _n==1
  collapse (sum) value firms, by(hs n_input)
  keep if n_input==1
  contract hs
  drop _freq
  save "$imp_type/hs_list.dta", replace
  
  use "$imp_type/temp.dta", clear
  bys firmid hs n_input: gen firms=1 if _n==1
  collapse (sum) value firms, by(hs n_prod)
  keep if n_prod==1
  contract hs
  drop _freq
  
  merge 1:1 hs using "$imp_type/hs_list.dta"
  gen code_status="input" if _merge==2
    replace code_status="prod" if _merge==1
    replace code_status="both" if _merge==3
  drop _merge
  save "$imp_type/hs_list.dta", replace
 
 use "$imp_type_input/firm_imports_type_2007.dta", clear
 collapse (sum) value, by(hs)
 
 merge 1:1 hs using "$imp_type/hs_list.dta"
   replace code_status="none" if code_status==""
   drop _merge 
   
   tab code_status
  save "$imp_type/hs_list.dta", replace
  
  collapse (sum) value, by(code)
  egen tot=sum(value)
  gen sh=value/tot
******************************************************************************  



*Output firm dataset
******************************************************************************  
use "$imp_type/temp.dta", clear

drop if substr(hs,1,2)=="27"

keep firmid hs country value rp_value naics* type* f_* n_* AR in_cmf in_mat in_prod

*rescale imports to 000s so they match the other data
foreach vv in value rp_value {
	replace `vv'=`vv'/1000
	label variable `vv' "Imports in $000s"
}



*Key variables to disclose
gen imp_bothf=value if f_input==1 & f_prod==1  // firm both
gen imp_nonef=value if f_input~=1 & f_prod~=1 // firm none
gen imp_inputf_np=value  if f_input==1 & f_prod~=1 // firm input not prod
gen imp_prodf_nif=value  if f_input~=1 & f_prod==1  // firm prod not input

gen rp_imp_bothf=rp_value if f_input==1 & f_prod==1  // firm both
gen rp_imp_nonef=rp_value if f_input~=1 & f_prod~=1 // firm none
gen rp_imp_inputf_np=rp_value  if f_input==1 & f_prod~=1 // firm input not prod
gen rp_imp_prodf_nif=rp_value  if f_input~=1 & f_prod==1  // firm prod not input

label variable rp_imp_bothf "Firm reports input and production"
label variable rp_imp_nonef "Firm does not report input or production"
label variable rp_imp_inputf_np "Firm reports input but not production"
label variable rp_imp_prodf_nif "Firm reports production but not input"


*make type variable to check coverage of key firm types
gen type_firms="Both" if f_input==1 & f_prod==1 
	replace type_firms="None" if f_input~=1 & f_prod~=1 
	replace type_firms="Input Only" if f_input==1 & f_prod~=1
	replace type_firms="Prod Only" if f_input~=1 & f_prod==1
	
*make a check variable
 egen tot_check=rowtotal(imp_bothf imp_nonef imp_inputf_np imp_prodf_nif)
 
 gen check=value-tot_check
 summ check

tab type_firms, miss

save "$imp_type/firm_imp_types_2007.dta", replace

******************************************************************************  
